/* 
    Purpose: Using the 1950 Census, this file takes cleaned variables 
             from 2c and calculates average predicted mother income 
             (i.e., "income scores") at ONLY the occupation x race x south
             level.

    Note: Will create templates/income scores at occ and occ x race levels 
          solely to impute income for missing occ x race x south cells.

    Creates: incomescores_mothers1950_byocc_byr_bys.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"
 
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************

/* Note: Census microdata from 2c does not have 
         all 28 coarsened occupations, so will use 
         the template below that does have all occupations. 
*/
    use ./code/OtherCensus_RawData/motheroccej_template_occs.dta, clear 

* Template 1: occupation
preserve 
    
    tempfile template_byocc
    save `template_byocc'
        
restore 

* Template 2: occupation x race
preserve 
    
    expand 2, gen(race)
    replace race=race+1
    
    tempfile template_byocc_byr
    save `template_byocc_byr'
        
restore 

* Template 3: occupation x race x south
preserve 
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)
    
    tempfile template_byocc_byr_bys
    save `template_byocc_byr_bys'
        
restore 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

    use ./output/Census1950_mothers_ages30to50.dta, clear   
    gen number=1 

    global income_measures "inctot" 

*******************
*** COLLAPSE 
*******************

foreach x in byocc byocc_byr byocc_byr_bys  {

    if "`x'"=="byocc" local cell "motheroccej"
    if "`x'"=="byocc_byr" local cell "motheroccej race"
    if "`x'"=="byocc_byr_bys" local cell "motheroccej race south_merge"

preserve 
    
    collapse (rawsum) number (mean) $income_measures [pw=slwt], by(`cell') 
    
    foreach c in $income_measures {
    rename `c' avg_`c'_1950_`x'
    label var avg_`c'_1950_`x' "Coarse (mother) income score, average, 1950 using `c'"
    }
    
    tempfile incomescores
    save `incomescores'
    
    use `template_`x''
    merge 1:1 `cell' using `incomescores', nogen
    
    replace number=0 if number==.
    label var number "Number of obs in cell"
    rename number number_1950obs_`x'

    tempfile incomescores_`x'
    save `incomescores_`x''
    
restore 
        
}

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
*** IMPUTATIONS (INCTOT)
***************************

/*Note: Will impute any missing occ x race x south cells 
        with average value of less granular level. */

    local name1 "byocc"
    local name2 "byocc_byr"
    local name3 "byocc_byr_bys"
        
    * Merge all templates together. Bring in template with largest # of cells first.  
    use `incomescores_byocc_byr_bys', clear
    merge m:1 motheroccej race using `incomescores_byocc_byr', nogen
    merge m:1 motheroccej using `incomescores_byocc', nogen

    * Impute occ x race using average occupation value
    foreach c in $income_measures {
    count if avg_`c'_1950_`name2'==.
    replace avg_`c'_1950_`name2' = avg_`c'_1950_`name1' if avg_`c'_1950_`name2'==.
    }
   
    * Impute occ x race x south using average occ x race value
    foreach c in $income_measures {
    count if avg_`c'_1950_`name3'==.
    replace avg_`c'_1950_`name3' = avg_`c'_1950_`name2' if avg_`c'_1950_`name3'==.
    }  

***********
* SAVE
***********

    foreach x in byocc_byr_bys {
    
    if "`x'"=="byocc_byr_bys" local cell "motheroccej race south_merge"

    preserve 
    
    bysort `cell': keep if _n==1
    keep number_1950obs_`x' `cell' avg*`x'
    
    tab motheroccej if avg_inctot_1950_`x'==.   

****************************************************    
/* Last step: impute occ x race x south income 
              when average income is missing 
              at all levels                       */
****************************************************    
    /* (1) Give lawyers and engineers (occs 7 and 8) average income of 
            professionals (occ=17) in the same race x region */
    foreach y in 7 8 {
        foreach race in 1 2 {
            foreach reg in 0 1 {
            
            sum avg_inctot_1950_`x' if motheroccej==17 & race==`race' & south_merge==`reg'
            replace avg_inctot_1950_`x'=`r(mean)' if motheroccej==`y' & race==`race' & south_merge==`reg'
            
            }
        }
    }

    /* (2) Outside sales workers (occ=36) average income of 
            inside sales workers (occ=35) in the same race x region */
    foreach y in 36 {
        foreach race in 1 2 {
            foreach reg in 0 1 {
            
            sum avg_inctot_1950_`x' if motheroccej==35 & race==`race' & south_merge==`reg'
            replace avg_inctot_1950_`x'=`r(mean)' if motheroccej==`y' & race==`race' & south_merge==`reg'
            
            }
        }
    }
    
    assert avg_inctot_1950_`x'!=.
     
    compress
    save ./output/incomescores_mothers1950_`x'.dta, replace
    
    restore
    
    }
